{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import time\n",
    "import datetime\n",
    "import itertools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>日期</th>\n",
       "      <th>顾客编号</th>\n",
       "      <th>分类</th>\n",
       "      <th>购买量</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2021-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2021-01-02</td>\n",
       "      <td>1</td>\n",
       "      <td>E</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2021-01-06</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2021-01-10</td>\n",
       "      <td>1</td>\n",
       "      <td>C</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2021-01-10</td>\n",
       "      <td>1</td>\n",
       "      <td>E</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          日期  顾客编号 分类  购买量\n",
       "0 2021-01-01     1  A    4\n",
       "1 2021-01-02     1  E    1\n",
       "2 2021-01-06     1  A    4\n",
       "3 2021-01-10     1  C    4\n",
       "4 2021-01-10     1  E    3"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 导入数据集\n",
    "df_raw = pd.read_excel('./data.xlsx',sheet_name='Sheet4').iloc[:,:4]\n",
    "# df_raw['日期'] = pd.to_datetime(df_raw['日期'])\n",
    "# df_raw = df_raw.set_index('日期')\n",
    "df_raw.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('2021-01-23', '2020-12-25')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 获取间隔日期的最大、最小日期（最小日期往前推 7 日）\n",
    "date_max = datetime.datetime.strftime(df_raw.日期.max(), '%Y-%m-%d')\n",
    "date_min = datetime.datetime.strftime(df_raw.日期.min() - datetime.timedelta(days=7), '%Y-%m-%d')\n",
    "date_max,date_min"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>顾客编号</th>\n",
       "      <th>分类</th>\n",
       "      <th>日期</th>\n",
       "      <th>购买量</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>2021-01-01</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>2021-01-06</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>2021-01-12</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>B</td>\n",
       "      <td>2021-01-13</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>C</td>\n",
       "      <td>2021-01-10</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   顾客编号 分类          日期  购买量\n",
       "0     1  A  2021-01-01    4\n",
       "1     1  A  2021-01-06    4\n",
       "2     1  A  2021-01-12    5\n",
       "3     1  B  2021-01-13    3\n",
       "4     1  C  2021-01-10    4"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对数据分组\n",
    "df_group = df_raw.groupby(['顾客编号','分类','日期']).agg('sum').reset_index()\n",
    "df_group = df_group.sort_values(by=['顾客编号','分类','日期'],ascending=True)\n",
    "df_group['日期'] = df_group['日期'].apply(lambda x:x.strftime('%Y-%m-%d'))\n",
    "df_group.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2020-12-25</td>\n",
       "      <td>2020-12-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2020-12-26</td>\n",
       "      <td>2020-12-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2020-12-27</td>\n",
       "      <td>2020-12-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2020-12-28</td>\n",
       "      <td>2020-12-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2020-12-29</td>\n",
       "      <td>2020-12-29</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       index          日期\n",
       "0 2020-12-25  2020-12-25\n",
       "1 2020-12-26  2020-12-26\n",
       "2 2020-12-27  2020-12-27\n",
       "3 2020-12-28  2020-12-28\n",
       "4 2020-12-29  2020-12-29"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 生成连续时间索引\n",
    "df = pd.DataFrame()\n",
    "df_index = pd.date_range(date_min,date_max)\n",
    "df = df.reindex(df_index).reset_index()\n",
    "df['日期'] = df['index'].apply(lambda x:x.strftime('%Y-%m-%d'))\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2020-12-25</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2020-12-25</td>\n",
       "      <td>1</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2020-12-25</td>\n",
       "      <td>1</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2020-12-25</td>\n",
       "      <td>1</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2020-12-25</td>\n",
       "      <td>1</td>\n",
       "      <td>E</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            0  1  2\n",
       "0  2020-12-25  1  A\n",
       "1  2020-12-25  1  B\n",
       "2  2020-12-25  1  C\n",
       "3  2020-12-25  1  D\n",
       "4  2020-12-25  1  E"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 构建全集\n",
    "df_custid = list(df_group.顾客编号.unique())\n",
    "df_catelog = list(df_group.分类.unique())\n",
    "df_date = list(df['日期'].unique())\n",
    "temp_list = []\n",
    "for x in itertools.product(df_date,df_custid,df_catelog):\n",
    "    temp_list.append(list(x))\n",
    "temp_df = pd.DataFrame(temp_list)\n",
    "temp_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>购买量</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2020-12-25</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2020-12-26</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2020-12-27</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>2020-12-28</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>2020-12-29</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>389</th>\n",
       "      <td>2021-01-19</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>404</th>\n",
       "      <td>2021-01-20</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>419</th>\n",
       "      <td>2021-01-21</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>434</th>\n",
       "      <td>2021-01-22</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>449</th>\n",
       "      <td>2021-01-23</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>450 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              0  1  2  购买量\n",
       "0    2020-12-25  1  A  NaN\n",
       "15   2020-12-26  1  A  NaN\n",
       "30   2020-12-27  1  A  NaN\n",
       "45   2020-12-28  1  A  NaN\n",
       "60   2020-12-29  1  A  NaN\n",
       "..          ... .. ..  ...\n",
       "389  2021-01-19  3  E  NaN\n",
       "404  2021-01-20  3  E  NaN\n",
       "419  2021-01-21  3  E  NaN\n",
       "434  2021-01-22  3  E  NaN\n",
       "449  2021-01-23  3  E  NaN\n",
       "\n",
       "[450 rows x 4 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 左连接\n",
    "df_merge = pd.merge(temp_df,df_group, how='left', left_on=[1,2,0],right_on=['顾客编号','分类','日期'])\n",
    "df_merge.drop(columns=['顾客编号','分类','日期'],inplace=True)\n",
    "df_merge = df_merge.sort_values(by=[1,2,0],ascending=True)\n",
    "df_merge"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>购买量</th>\n",
       "      <th>rolling_7</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2020-12-25</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2020-12-26</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2020-12-27</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>2020-12-28</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>2020-12-29</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>389</th>\n",
       "      <td>2021-01-19</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>404</th>\n",
       "      <td>2021-01-20</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>419</th>\n",
       "      <td>2021-01-21</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>434</th>\n",
       "      <td>2021-01-22</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>449</th>\n",
       "      <td>2021-01-23</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>450 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              0  1  2  购买量  rolling_7\n",
       "0    2020-12-25  1  A  NaN        0.0\n",
       "15   2020-12-26  1  A  NaN        0.0\n",
       "30   2020-12-27  1  A  NaN        0.0\n",
       "45   2020-12-28  1  A  NaN        0.0\n",
       "60   2020-12-29  1  A  NaN        0.0\n",
       "..          ... .. ..  ...        ...\n",
       "389  2021-01-19  3  E  NaN        5.0\n",
       "404  2021-01-20  3  E  NaN        5.0\n",
       "419  2021-01-21  3  E  NaN        5.0\n",
       "434  2021-01-22  3  E  NaN        0.0\n",
       "449  2021-01-23  3  E  NaN        0.0\n",
       "\n",
       "[450 rows x 5 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 测试\n",
    "df = df_merge.assign(rolling_7=df_merge.groupby([1],as_index=False)[['购买量']].rolling(7,min_periods=1).sum().fillna(0).reset_index(0,drop=True))\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>日期</th>\n",
       "      <th>顾客编号</th>\n",
       "      <th>分类</th>\n",
       "      <th>购买量</th>\n",
       "      <th>近7日购买量</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2021-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2021-01-06</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2021-01-12</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>5.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2021-01-13</td>\n",
       "      <td>1</td>\n",
       "      <td>B</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2021-01-10</td>\n",
       "      <td>1</td>\n",
       "      <td>C</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2021-01-12</td>\n",
       "      <td>1</td>\n",
       "      <td>C</td>\n",
       "      <td>3.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2021-01-13</td>\n",
       "      <td>1</td>\n",
       "      <td>C</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2021-01-15</td>\n",
       "      <td>1</td>\n",
       "      <td>C</td>\n",
       "      <td>4.0</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2021-01-14</td>\n",
       "      <td>1</td>\n",
       "      <td>D</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2021-01-02</td>\n",
       "      <td>1</td>\n",
       "      <td>E</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>2021-01-10</td>\n",
       "      <td>1</td>\n",
       "      <td>E</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2021-01-13</td>\n",
       "      <td>1</td>\n",
       "      <td>E</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>2021-01-12</td>\n",
       "      <td>2</td>\n",
       "      <td>A</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>2021-01-18</td>\n",
       "      <td>2</td>\n",
       "      <td>A</td>\n",
       "      <td>2.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>2021-01-20</td>\n",
       "      <td>2</td>\n",
       "      <td>A</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>2021-01-14</td>\n",
       "      <td>2</td>\n",
       "      <td>B</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>2021-01-19</td>\n",
       "      <td>2</td>\n",
       "      <td>B</td>\n",
       "      <td>5.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>2021-01-10</td>\n",
       "      <td>2</td>\n",
       "      <td>D</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>2021-01-11</td>\n",
       "      <td>2</td>\n",
       "      <td>D</td>\n",
       "      <td>7.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>2021-01-09</td>\n",
       "      <td>2</td>\n",
       "      <td>E</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>2021-01-10</td>\n",
       "      <td>2</td>\n",
       "      <td>E</td>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>2021-01-17</td>\n",
       "      <td>2</td>\n",
       "      <td>E</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>2021-01-18</td>\n",
       "      <td>2</td>\n",
       "      <td>E</td>\n",
       "      <td>5.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>2021-01-20</td>\n",
       "      <td>2</td>\n",
       "      <td>E</td>\n",
       "      <td>4.0</td>\n",
       "      <td>11.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>2021-01-11</td>\n",
       "      <td>3</td>\n",
       "      <td>A</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>2021-01-17</td>\n",
       "      <td>3</td>\n",
       "      <td>A</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>2021-01-10</td>\n",
       "      <td>3</td>\n",
       "      <td>B</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>2021-01-11</td>\n",
       "      <td>3</td>\n",
       "      <td>B</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>2021-01-12</td>\n",
       "      <td>3</td>\n",
       "      <td>B</td>\n",
       "      <td>5.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>2021-01-15</td>\n",
       "      <td>3</td>\n",
       "      <td>B</td>\n",
       "      <td>3.0</td>\n",
       "      <td>12.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2021-01-23</td>\n",
       "      <td>3</td>\n",
       "      <td>B</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>2021-01-13</td>\n",
       "      <td>3</td>\n",
       "      <td>C</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>2021-01-15</td>\n",
       "      <td>3</td>\n",
       "      <td>D</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>2021-01-16</td>\n",
       "      <td>3</td>\n",
       "      <td>D</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>2021-01-19</td>\n",
       "      <td>3</td>\n",
       "      <td>D</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>2021-01-08</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>2021-01-12</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>3.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>2021-01-15</td>\n",
       "      <td>3</td>\n",
       "      <td>E</td>\n",
       "      <td>5.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            日期  顾客编号 分类  购买量  近7日购买量\n",
       "0   2021-01-01     1  A  4.0     4.0\n",
       "1   2021-01-06     1  A  4.0     8.0\n",
       "2   2021-01-12     1  A  5.0     9.0\n",
       "3   2021-01-13     1  B  3.0     3.0\n",
       "4   2021-01-10     1  C  4.0     4.0\n",
       "5   2021-01-12     1  C  3.0     7.0\n",
       "6   2021-01-13     1  C  3.0    10.0\n",
       "7   2021-01-15     1  C  4.0    14.0\n",
       "8   2021-01-14     1  D  3.0     3.0\n",
       "9   2021-01-02     1  E  1.0     1.0\n",
       "10  2021-01-10     1  E  3.0     3.0\n",
       "11  2021-01-13     1  E  1.0     4.0\n",
       "12  2021-01-12     2  A  6.0     6.0\n",
       "13  2021-01-18     2  A  2.0     8.0\n",
       "14  2021-01-20     2  A  5.0     7.0\n",
       "15  2021-01-14     2  B  4.0     4.0\n",
       "16  2021-01-19     2  B  5.0     9.0\n",
       "17  2021-01-10     2  D  2.0     2.0\n",
       "18  2021-01-11     2  D  7.0     9.0\n",
       "19  2021-01-09     2  E  3.0     3.0\n",
       "20  2021-01-10     2  E  4.0     7.0\n",
       "21  2021-01-17     2  E  2.0     2.0\n",
       "22  2021-01-18     2  E  5.0     7.0\n",
       "23  2021-01-20     2  E  4.0    11.0\n",
       "24  2021-01-11     3  A  5.0     5.0\n",
       "25  2021-01-17     3  A  2.0     7.0\n",
       "26  2021-01-10     3  B  3.0     3.0\n",
       "27  2021-01-11     3  B  1.0     4.0\n",
       "28  2021-01-12     3  B  5.0     9.0\n",
       "29  2021-01-15     3  B  3.0    12.0\n",
       "30  2021-01-23     3  B  5.0     5.0\n",
       "31  2021-01-13     3  C  2.0     2.0\n",
       "32  2021-01-15     3  D  2.0     2.0\n",
       "33  2021-01-16     3  D  1.0     3.0\n",
       "34  2021-01-19     3  D  5.0     8.0\n",
       "35  2021-01-08     3  E  5.0     5.0\n",
       "36  2021-01-12     3  E  3.0     8.0\n",
       "37  2021-01-15     3  E  5.0     8.0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 剔除 购买量 为空的行\n",
    "df_result = df.dropna(subset=['购买量']).reset_index()\n",
    "df_result .drop(columns=['index'],inplace=True)\n",
    "df_result.rename(columns={0:'日期',1:'顾客编号',2:'分类','rolling_7':'近7日购买量'},inplace=True)\n",
    "df_result "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
